The purpose of this workflow is to start exploring the LPS sensitivity data and to run cape on it to see if there any informative interactions.
rm(list = ls())
library(here)
all.fun <- list.files(here("Code"), full.names = TRUE, pattern = ".R")
for(i in 1:length(all.fun)){source(all.fun[i])}
all.packages <- c("pheatmap", "qtl2", "cluster")
load_libraries(all.packages)
load_latest_cape(here("../../../git_repositories/cape"))
#decide if we want to code markers additively, use a heterosis
#model in which AA and BB are 0 and AB is 1 across the whole
#genome, or use a heterosis coding just on chromosome 17, where
#we have observed heterosis previously
#cape_type <- c("additive", "heterosis", "heterosis17")
cape_type = "additive"; results_name = "cape_2ET_females"
#cape_type = "heterosis"; results_name <- "cape_heterosis"
#cape_type = "heterosis17"; results_name <- "cape_heterosis17"
We need to take the non-numeric phenotypes out of the phenotype file and put them into covariates. I also added ‘NA’ to the na.strings argument in the yaml file.
#if this is the first time through, create a new data directory
mod.data.dir <- here("Data", "qtl2_files_mod")
if(!file.exists(mod.data.dir)){
#duplicate all files
file.copy(here("Data", "qtl2_files"), mod.data.dir, recursive = TRUE)
orig.pheno <- read.csv(here("Data", "qtl2_files", "lps_pheno.csv"))
orig.covar <- read.csv(here("Data", "qtl2_files", "lps_covar.csv"))
nonnum.pheno.col <- c("strain", "category")
num.pheno.col <- setdiff(colnames(orig.pheno), nonnum.pheno.col)
nonnum.pheno <- cbind(orig.covar, orig.pheno[,nonnum.pheno.col])
num.pheno <- orig.pheno[,num.pheno.col]
write.table(num.pheno, here("Data", "qtl2_files_mod", "qtl2_files", "lps_pheno.csv"),
sep = ",", quote = FALSE, row.names = FALSE)
write.table(nonnum.pheno, here("Data", "qtl2_files_mod", "qtl2_files", "lps_covar.csv"),
sep = ",", quote = FALSE, row.names = FALSE)
}
#add 'NA' to the na.strings arguments in the yaml file to
#suppress the warning about "NAs introduced by coercion"
lps <- read_cross2(here("Data", "qtl2_files_mod", "qtl2_files", "lps.yaml"))
lps_cape <- qtl2_to_cape(lps)
## Converting sex to numeric.
## Converting cross_direction to numeric.
## Converting strain to numeric.
## Converting category to numeric.
## Converting genoprobs to array...
data_obj <- lps_cape$data_obj
geno_obj <- lps_cape$geno_obj
gene.info <- read.delim(here("Data", "general", "mouse_gene_info.txt"))
male.idx <- which(data_obj$pheno[,"sex"] == 1)
data_obj <- remove_ind(data_obj, male.idx)
Body temperature was measured hourly for seven hours post LPS injection. The following plot shows a heatmap of the temperatures after the injection. There are several patterns. Some temperatures stay fairly steady. Others dip a bit after a few hours, and some really drop in the last couple hours of the experiment. In some animals the temperatures go up initially, and others hold more steady before dropping.
temp.idx <- grep("X", colnames(data_obj$pheno))
temp.label <- gsub("X", "", colnames(data_obj$pheno)[temp.idx])
temp <- data_obj$pheno[,temp.idx]
pheatmap(temp, cluster_cols = FALSE)
k = 7
We can cluster the tamperature profiles hierarchically to see the different patterns. The following heatmap shows the cluster assignments for 7 clusters.
temp_clust <- hclust(dist(temp))
cl <- cutree(temp_clust, k = k)
#reorder cluster based on mean temperature
cl_temp <- lapply(1:k, function(x) rowMeans(temp[which(cl == x),,drop=FALSE]))
#boxplot(cl_temp)
cl_mean <- sapply(cl_temp, mean)
cl_order <- order(cl_mean, decreasing = TRUE)
#boxplot(cl_temp[cl_order])
ordered_cl <- rep(NA, length(cl))
for(cl_k in 1:k){
ordered_cl[which(cl == cl_order[cl_k])] <- cl_k
}
cl_annot <- data.frame("cluster" = as.factor(ordered_cl))
rownames(cl_annot) <- rownames(temp)
pheatmap(temp, annotation_row = cl_annot, cluster_cols = FALSE)
If we plot the profiles out for each cluster independently, we can better see the different profiles. We’ve ordered them here by the mean temperature for each cluster for ease of comparison.
layout.mat <- get.layout.mat(k)
layout(layout.mat)
for(cl.k in 1:k){
plot.new()
plot.window(xlim = c(1,ncol(temp)), ylim = c(min(temp), max(temp)))
cl.idx <- which(ordered_cl == cl.k)
axis(1);mtext("Hrs", side = 1, line = 2.5)
axis(2);mtext("Temp (C)", side = 2, line = 2.5)
mtext(paste("Cluster", cl.k), side = 3)
for(i in 1:length(cl.idx)){
points(1:ncol(temp), temp[cl.idx[i],], col = ordered_cl[cl.idx[i]], type = "b", pch = 16)
}
}
If we plot the first two principal components of the temperature matrix against each other, we see that the clusters separate along the first principal component, which accounts for 84% of the variation in temperature. This suggests that the first prinicipal component might be a good trait for mapping.
temp.decomp <- plot.decomp(temp, plot.results = FALSE)
#temp.decomp <- plot.decomp(temp, cols = data_obj$pheno[,"sex"]+1) #no effect of sex
#I am flipping this so it is positively correlated with temperature
temp.pc <- temp.decomp$u[,1,drop=FALSE]*-1
colnames(temp.pc) <- "Temp_PC1"
rownames(temp.pc) <- rownames(temp)
plot(temp.decomp$u*-1, col = ordered_cl, pch = 16, xlab = "PC1", ylab = "PC2")
#also use the second PC
temp.pc2 <- temp.decomp$u[,2,drop=FALSE]*-1 #also flipping this one for ease of interpretation later
colnames(temp.pc2) <- "Temp_PC2"
rownames(temp.pc2) <- rownames(temp)
data_obj$pheno <- cbind(data_obj$pheno, temp.pc, temp.pc2)
#boxplot(temp.decomp$u[,1]~cl)
The first PC of the temperature matrix is more heavily weighted on later temperature measurements than on earlier temperature measurements.
#quartz(width = 9, height = 5)
par(mfrow = c(2,4))
for(i in 1:ncol(temp)){
plot.with.model(temp[,i], temp.pc, main = colnames(temp)[i], xlab = colnames(temp)[i],
ylab = "Temp PC1")
}
The second PC of the temperature matrix is more heavily weighted on the mid point temperatures.
#quartz(width = 9, height = 5)
par(mfrow = c(2,4))
for(i in 1:ncol(temp)){
plot.with.model(temp[,i], temp.pc2, main = colnames(temp)[i], xlab = colnames(temp)[i],
ylab = "Temp PC2")
}
The first temperature PC is highly correlated with the minimum temperature each mouse achieved, whereas the second temperature PC is highly correlated with the maximum temperature each mouse achieved. Maybe we should use both PCs with cape.
temp.max <- apply(temp, 1, max)
temp.min <- apply(temp, 1, min)
par(mfrow = c(1,2))
plot.with.model(temp.min, temp.pc, main = "Temp PC1 vs. Temperature Miminum")
plot.with.model(temp.max, temp.pc2, main = "Temp PC2 vs. Temperature Maximum")
If we look just at the values of PC1 of the temperature matrix, we see that some animals have negative values. The mice with the most negative values in PC1 are in cluster 1 above. These are the most reistant to LPS. Their temperature barely changes at all. The mice with the most positive values in PC1 are in clusters 6 and 7. These mice had the most dramatic drops in temperature and are the most sensitive to LPS. Thus, the value of this PC describes how sensitive or resistant the mice are to LPS as determined by their temperature profile.
#quartz(width = 9, height = 5.5)
pc.order <- order(temp.pc)
plot(temp.pc[pc.order], type = "h", col = ordered_cl[pc.order], lwd = 3,
main = "PC1 by cluster", ylab = "Temp PC1")
abline(h = 0)
legend("topleft", legend = paste("Cluster", 1:k), lty = 1, lwd = 3, col = 1:k)
plot.dim <- par("usr")
plot.height = plot.dim[4] - plot.dim[3]
plot.width <- plot.dim[2] - plot.dim[1]
arrow.height <- (plot.dim[3]+(plot.height*0.05))
text.height <- (plot.dim[3]+(plot.height*0.1))
start.nudge <- plot.width*0.02
arrows(x0 = (plot.dim[2]/2 - start.nudge), x1 = plot.dim[2]/4, y0 = arrow.height, lwd = 3)
text(x = (plot.dim[2]/2 - start.nudge), y = text.height, labels = "More LPS sensitive", adj = 1)
arrows(x0 = (plot.dim[2]/2 + start.nudge), x1 = plot.dim[2]*0.75, y0 = arrow.height, lwd = 3)
text(x = (plot.dim[2]/2 + start.nudge), y = text.height, labels = "More LPS resistant", adj = 0)
The animals in the upper ranges of the first PC of the temperature matrix, also tend to have higher measurements on in the immune matrix.
immune <- data_obj$pheno[,c("Il.1b", "Il.6", "TNF", "IFN.b")]
norm.immune <- apply(immune, 2, rankZ)
par(mfrow = c(2,2))
for(i in 1:ncol(norm.immune)){
plot.with.model(temp.pc, immune[,i], xlab = "Temperature PC 1",
ylab = colnames(norm.immune)[i],
main = colnames(norm.immune)[i], report = "cor.test",
col = ordered_cl)
}
These correlations extend the range of the values. If we rank Z normalize the values, the correlations are even stronger. There is a floor effect in each of the immune molecules, which is most pronounce in IFN beta. There are animals with a range of temperature profiles that did not have measurable IFN beta.
par(mfrow = c(2,2))
for(i in 1:ncol(norm.immune)){
plot.with.model(temp.pc, norm.immune[,i], xlab = "Temperature PC 1",
ylab = paste("Normalized", colnames(norm.immune)[i]),
main = paste("Normalized", colnames(norm.immune)[i]), report = "cor.test",
col = ordered_cl)
}
The second temperature PC2 is negatively correlated with the cytokines.
par(mfrow = c(2,2))
for(i in 1:ncol(norm.immune)){
plot.with.model(temp.pc2, norm.immune[,i], xlab = "Temperature PC 2",
ylab = colnames(norm.immune)[i],
main = colnames(norm.immune)[i], report = "cor.test",
col = ordered_cl)
}
As an initial test, we mapped each of the normalized traits.
The first PC of the temperature matrix has a suggestive QTL on proximal Chr 17. Maybe the MHC locus? IL6 and TNF also map to Chr 17, although the QTLs look more distal. Neither IL1 beta nor IFN beta map anywhere.
genoprobs <- calc_genoprob(lps)
full.pheno <- cbind(temp.pc, temp.pc2, norm.immune)
temp.scan <- scan1(genoprobs, full.pheno)
temp.perm <- scan1perm(genoprobs, full.pheno, n_perm = 100)
alpha <- c(0.05, 0.01, 0.1)
sig.level <- summary(temp.perm, alpha = alpha)
for(ph in 1:ncol(full.pheno)){
cat("###", colnames(full.pheno)[ph], "\n")
plot(temp.scan, lodcol = ph, map = lps$pmap, ylim = c(0, max(c(max(temp.scan), max(sig.level)*1.05))),
main = colnames(full.pheno)[ph])
plot.dim <- par("usr")
plot.width <- plot.dim[2] - plot.dim[1]
text.x <- plot.dim[2]-(plot.width*0.007)
line.stop <- plot.dim[2]-(plot.width*0.04)
segments(x0 = 0, x1 = line.stop, y0 = sig.level[,ph], lty = 2)
text(x = text.x, y = sig.level[,ph], labels = alpha, adj = 1)
cat("\n\n")
}
Now we run cape with the combined temperature PC1 and cytokines. Please see cape_results.html for results from cape.
#run cape with an additive coding for all markers
if(cape_type == "additive"){
results_dir <- here("Results", results_name)
final_obj <- run_cape(data_obj, geno_obj,
param_file = file.path(results_dir, "0_lps.parameters_0.yml"),
results_path = results_dir)
}
## Removing unused markers...
## Checking for invariant markers.
## Running pheno2covar...
## Plotting svd.pdf...
## Plotting svd.jpg...
## Selecting eigentraits...
## Saving data_obj into rds file...
## Running Singlescan...
## Running pairscan...
## Running reparameterization...
## Saving Variant_influences results...
## Saving Variant_Interactions results...
## Plotting Variant_influences.pdf...
## Plotting Variant_influences.jpg...
## Plotting Network_Circular.pdf...
## Plotting Network_Circular.jpg...
if(cape_type == "heterosis"){
#run cape with a coding for heterosis.
#AA and BB are 0 and A/B is 1
code_heterosis <- function(marker_geno){
new.geno <- matrix(0, nrow = nrow(marker_geno), ncol = ncol(marker_geno))
dimnames(new.geno) <- dimnames(marker_geno)
which.hom <- union(which(marker_geno[,1] > 0.9), which(marker_geno[,2] > 0.9))
new.geno[which.hom,1] <- 1
which.het <- intersect(which(marker_geno[,1] < 0.6), which(marker_geno[,2] < 0.6))
new.geno[which.het,2] <- 1
return(new.geno)
}
heterosis_markers <- lapply(1:dim(geno_obj)[3], function(x) code_heterosis(geno_obj[,,x]))
heterosis_geno <- abind(heterosis_markers, along = 3)
dimnames(heterosis_geno) <- dimnames(geno_obj)
results_dir <- here("Results", results_name)
final_obj <- run_cape(data_obj, heterosis_geno,
param_file = file.path(results_dir, "0_lps.parameters_0.yml"),
results_path = results_dir)
}
if(cape_type == "heterosis17"){
#run cape with a coding for heterosis only on chromosome 17.
#AA and BB are 0 and A/B is 1
code_heterosis <- function(marker_geno, recode = TRUE){
if(!recode){
return(marker_geno)
}
new.geno <- matrix(0, nrow = nrow(marker_geno), ncol = ncol(marker_geno))
dimnames(new.geno) <- dimnames(marker_geno)
which.hom <- union(which(marker_geno[,1] > 0.9), which(marker_geno[,2] > 0.9))
new.geno[which.hom,1] <- 1
which.het <- intersect(which(marker_geno[,1] < 0.6), which(marker_geno[,2] < 0.6))
new.geno[which.het,2] <- 1
return(new.geno)
}
do.recode <- rep(FALSE, dim(geno_obj)[[3]])
chr17.idx <- which(data_obj$chromosome == 17)
do.recode[chr17.idx] <- TRUE
heterosis_markers <- lapply(1:dim(geno_obj)[3], function(x) code_heterosis(geno_obj[,,x], recode = do.recode[x]))
heterosis_geno <- abind(heterosis_markers, along = 3)
dimnames(heterosis_geno) <- dimnames(geno_obj)
results_dir <- here("Results", results_name)
final_obj <- run_cape(data_obj, heterosis_geno,
param_file = file.path(results_dir, "0_lps.parameters_0.yml"),
results_path = results_dir)
}